import httpx
import json
import re


class DietDetailCrawler:

    def __init__(self, url):
        self.url = url
        self.headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309092b) XWEB/9053',
        }
        self.session = httpx.Client(http2=True, verify=False)
        self.json_data = {}
        self.parse_dit = {}

    def get_data(self):
        # 发送请求
        response = self.session.get(url=self.url, headers=self.headers)
        # 获取数据
        self.json_data = response.json()

    def parse_data(self):
        # 解析数据
        name = self.json_data['data']['cateName']
        introduce = self.json_data['data']['introduce']
        introduce = re.sub(r"[^\u4e00-\u9fa5，。、；！]", "", introduce)  # 去除特殊字符

        synopsis = self.json_data['data']['synopsis']
        label = ",".join([item['labelName'] for item in self.json_data['data']['labelList']]) + synopsis
        rank_list = [item['shopName'] for item in self.json_data['data']['rankList']]
        dit = {
            'name': name,
            'introduce': introduce,
            'feature': label,
            'recommended_shop': rank_list
        }
        self.parse_dit = dit
        print(name)

    def save_data(self):
        with open('../data/data1.json', 'a+', encoding='utf-8') as f:
            json.dump(self.parse_dit, f, ensure_ascii=False, indent=4)
            f.write(',\n')

    def run(self):
        self.get_data()
        self.parse_data()
        self.save_data()


def main(url):
    spider = DietDetailCrawler(url)
    spider.run()


if __name__ == '__main__':

    url = "https://wlfw.changsha.gov.cn/api/service/content/contentcate/1275721376908443650"
    spider = DietDetailCrawler(url)
    spider.run()


